<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Phonetic Matching | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="fuzzy-matching.html" title="Typoes and Mispelings">
<link rel="prev" href="fuzzy-scoring.html" title="Scoring Fuzziness">
<link rel="next" href="aggregations.html" title="Aggregations">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="languages.html">Dealing with Human Language</a></span>
»
<span class="breadcrumb-link"><a href="fuzzy-matching.html">Typoes and Mispelings</a></span>
»
<span class="breadcrumb-node">Phonetic Matching</span>
</div>
<div class="navheader">
<span class="prev">
<a href="fuzzy-scoring.html">« Scoring Fuzziness</a>
</span>
<span class="next">
<a href="aggregations.html">Aggregations »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="phonetic-matching"></a>Phonetic Matching<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/270_Fuzzy_matching/60_Phonetic_matching.asciidoc">edit</a>
</h2>
</div></div></div>
<p>In a last, desperate, attempt to match something, anything, we could resort to
searching for words that sound similar, even if their spelling differs.</p>
<p>Several algorithms exist for converting words into a phonetic
representation. The <a href="http://en.wikipedia.org/wiki/Soundex" class="ulink" target="_top">Soundex</a> algorithm is
the granddaddy of them all, and most other phonetic algorithms are
improvements or specializations of Soundex, such as
<a href="http://en.wikipedia.org/wiki/Metaphone" class="ulink" target="_top">Metaphone</a> and
<a href="http://en.wikipedia.org/wiki/Metaphone#Double_Metaphone" class="ulink" target="_top">Double Metaphone</a>
(which expands phonetic matching to languages other than English),
<a href="http://en.wikipedia.org/wiki/Caverphone" class="ulink" target="_top">Caverphone</a> for matching names in New
Zealand, the
<a href="https://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex#Beider.E2.80.93Morse_Phonetic_Name_Matching_Algorithm" class="ulink" target="_top">Beider-Morse</a> algorithm, which adopts the Soundex algorithm
for better matching of German and Yiddish names, and the
<a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik" class="ulink" target="_top">Kölner Phonetik</a> for better
handling of German words.</p>
<p>The thing to take away from this list is that phonetic algorithms are fairly
crude, and very specific to the languages they were designed for, usually
either English or German.  This limits their usefulness.  Still, for certain
purposes, and in combination with other techniques, phonetic matching can be a
useful tool.</p>
<p>First, you will need to install the Phonetic Analysis plug-in from
<a href="/guide/en/elasticsearch/plugins/current/analysis-phonetic.html" class="ulink" target="_top">https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-phonetic.html</a> on every node
in the cluster, and restart each node.</p>
<p>Then, you can create a custom analyzer that uses one of the
phonetic token filters and try it out:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">PUT /my_index
{
  "settings": {
    "analysis": {
      "filter": {
        "dbl_metaphone": { <a id="CO182-1"></a><i class="conum" data-value="1"></i>
          "type":    "phonetic",
          "encoder": "double_metaphone"
        }
      },
      "analyzer": {
        "dbl_metaphone": {
          "tokenizer": "standard",
          "filter":    "dbl_metaphone" <a id="CO182-2"></a><i class="conum" data-value="2"></i>
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO182-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>First, configure a custom <code class="literal">phonetic</code> token filter that uses the
<code class="literal">double_metaphone</code> encoder.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO182-2"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>Then use the custom token filter in a custom analyzer.</p>
</td>
</tr>
</table>
</div>
<p>Now we can test it with the <code class="literal">analyze</code> API:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">GET /my_index/_analyze?analyzer=dbl_metaphone
Smith Smythe</pre>
</div>
<p>Each of <code class="literal">Smith</code> and <code class="literal">Smythe</code> produce two tokens in the same position:  <code class="literal">SM0</code>
and  <code class="literal">XMT</code>. Running <code class="literal">John</code>, <code class="literal">Jon</code>, and <code class="literal">Johnnie</code> through the analyzer will all
produce the two tokens <code class="literal">JN</code> and <code class="literal">AN</code>, while <code class="literal">Jonathon</code> results in the tokens
<code class="literal">JN0N</code> and <code class="literal">ANTN</code>.</p>
<p>The phonetic analyzer can be used just like any other analyzer. First map a
field to use it, and then index some data:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">PUT /my_index/_mapping/my_type
{
  "properties": {
    "name": {
      "type": "string",
      "fields": {
        "phonetic": { <a id="CO183-1"></a><i class="conum" data-value="1"></i>
          "type":     "string",
          "analyzer": "dbl_metaphone"
        }
      }
    }
  }
}

PUT /my_index/my_type/1
{
  "name": "John Smith"
}

PUT /my_index/my_type/2
{
  "name": "Jonnie Smythe"
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO183-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">name.phonetic</code> field uses the custom <code class="literal">dbl_metaphone</code> analyzer.</p>
</td>
</tr>
</table>
</div>
<p>The <code class="literal">match</code> query can be used for searching:</p>
<div class="pre_wrapper lang-json">
<pre class="programlisting prettyprint lang-json">GET /my_index/my_type/_search
{
  "query": {
    "match": {
      "name.phonetic": {
        "query": "Jahnnie Smeeth",
        "operator": "and"
      }
    }
  }
}</pre>
</div>
<p>This query returns both documents, demonstrating just how coarse phonetic
matching is.  Scoring with a phonetic algorithm is pretty much worthless. The
purpose of phonetic matching is not to increase precision, but to increase
recall—​to spread the net wide enough to catch any documents that might
possibly match.</p>
<p>It usually makes more sense to use phonetic algorithms when retrieving results
which will be consumed and post-processed by another computer, rather than by
human users.</p>
</div>
<div class="navfooter">
<span class="prev">
<a href="fuzzy-scoring.html">« Scoring Fuzziness</a>
</span>
<span class="next">
<a href="aggregations.html">Aggregations »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
